library(stringr)
library(ggplot2)
setwd('/home/song/project/going/neo4j/demo/logs');
files <- list.files()
#head(files)


extraVarFromFile = function(fileName,dbName){
  T.tmp = str_match_all(fileName, "\\d+-.+-read-write(\\d+)x(\\d+).log");
  readCount = unlist(T.tmp)[2];
  writeCount = unlist(T.tmp)[3];
  totalCount = as.numeric(readCount)+as.numeric(writeCount);
  
  content = readChar(fileName, file.info(fileName)$size);
  patternLines = c(
    "\\d+ INFO\\[main\\] time:100s, read:(\\d+),write:(\\d+),read range:0"
#    "(\\d+) INFO\\[main\\] \\[Time Usage\\] Total:(\\d+)ms, count:(\\d+), Average time:(\\d+\\.\\d+)ms\r\n"
  );
  patterns = paste(patternLines,collapse="");
  result = str_match_all(content, patterns);
  
  T.tmp0 = data.frame(result)
  T.tmp1 = data.frame(T.tmp0[-1], stringsAsFactors = FALSE)
  #T.tmp1 = data.frame(T.tmp0[-1,-1], stringsAsFactors = FALSE)
  T.index = sapply(T.tmp1, is.factor)
  T.tmp1[T.index] = lapply(T.tmp1[T.index], function(x) as.numeric(as.character(x)))
#   for(i in c(2:45)){
#     T.tmp1[i,'X4']=T.tmp1[i-1,'X4']+T.tmp1[i,'X4']
#   }
  T.tmp1["db"]=dbName
  T.tmp1["thread"]=totalCount
  T.tmp1
}

readFiles = function(fileList,dbName){
  T.df = data.frame(X2=numeric(),X3=numeric())
  for(f in fileList){
    T.tmp = extraVarFromFile(f,dbName)
    T.df = rbind(T.tmp,T.df);
  }
  T.df["tx"]=T.df["X2"]+T.df["X3"]
  T.df
}


fileList=c("20160303-neo4j-read-write5x5.log",
           "20160303-neo4j-read-write10x10.log",
           "20160303-neo4j-read-write15x15.log",
           "20160303-neo4j-read-write25x25.log",
           "20160303-neo4j-read-write35x35.log",
           "20160303-neo4j-read-write45x45.log",
           "20160303-neo4j-read-write55x55.log")
T.tmp1 = readFiles(fileList,"Neo4j")
#T.tmp1['X2']=T.tmp1['X2']-793585;

fileList=c("20160303-tgraph-read-write5x5.log",
           "20160303-tgraph-read-write10x10.log",
           "20160303-tgraph-read-write15x15.log",
           "20160303-tgraph-read-write25x25.log",
           "20160303-tgraph-read-write35x35.log",
           "20160303-tgraph-read-write45x45.log",
           "20160303-tgraph-read-write55x55.log")
T.tmp2 = readFiles(fileList,"TGraph")

#T.tmp = rbind(T.tmp1,T.tmp2)
T.tmp = rbind(T.tmp2,T.tmp1)

# x:time,y:tx commited
P.4 = ggplot(T.tmp, aes(x=thread,y=tx/100,colour=db,shape=db)) + geom_line() + geom_point(size=3)+
  theme_bw()+ theme(legend.justification=c(1,0), legend.position=c(1, 0),
                    legend.title=element_blank(),
                    legend.key=element_blank()
                    #axis.title=element_text(size=14)) + #,face="bold"
  )+
  #scale_x_continuous(limits=c(0,81500), labels=comma) + 
  #scale_y_continuous(limits=c(0,40000), labels=f2si) +
  #scale_y_continuous(trans='log') +
  labs(#title="Compare: committed transaction count",
    x="Number of threads",y="Number of transactions",fill="")


rm(list=setdiff(ls(), c("P.4","P.1","P.2","P.3")))
